; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s

define arm_aapcs_vfpcc <2 x i64> @sext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0246_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.s32 q1, q0, q2
; CHECK-NEXT:    vmov q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out1, %out2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0ext_0246:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.s32 q1, q2, q0
; CHECK-NEXT:    vmov q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0246_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull lr, r12, r1, r0
; CHECK-NEXT:    umull r2, r5, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r0, #31
; CHECK-NEXT:    mla r4, r1, r2, r12
; CHECK-NEXT:    asrs r1, r1, #31
; CHECK-NEXT:    mla r2, r3, r2, r5
; CHECK-NEXT:    asrs r3, r3, #31
; CHECK-NEXT:    mla r1, r1, r0, r4
; CHECK-NEXT:    mla r0, r3, r0, r2
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %out1, %shuf2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_ext0_0246:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    asrs r4, r0, #31
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull lr, r12, r0, r1
; CHECK-NEXT:    umull r2, r5, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r1, #31
; CHECK-NEXT:    mla r2, r0, r2, r12
; CHECK-NEXT:    mla r1, r4, r1, r2
; CHECK-NEXT:    asrs r2, r3, #31
; CHECK-NEXT:    mla r0, r0, r2, r5
; CHECK-NEXT:    mla r0, r4, r3, r0
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %shuf2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_1357_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q1[2], q1[0], r0, r0
; CHECK-NEXT:    vrev64.32 q2, q0
; CHECK-NEXT:    vmullb.s32 q0, q2, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out1, %out2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0ext_1357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.s32 q0, q2, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = sext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_1357_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    umull lr, r12, r1, r0
; CHECK-NEXT:    umull r2, r5, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r0, #31
; CHECK-NEXT:    mla r4, r1, r2, r12
; CHECK-NEXT:    asrs r1, r1, #31
; CHECK-NEXT:    mla r2, r3, r2, r5
; CHECK-NEXT:    asrs r3, r3, #31
; CHECK-NEXT:    mla r1, r1, r0, r4
; CHECK-NEXT:    mla r0, r3, r0, r2
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %out1, %shuf2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_ext0_1357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    asrs r4, r0, #31
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    umull lr, r12, r0, r1
; CHECK-NEXT:    umull r2, r5, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r1, #31
; CHECK-NEXT:    mla r2, r0, r2, r12
; CHECK-NEXT:    mla r1, r4, r1, r2
; CHECK-NEXT:    asrs r2, r3, #31
; CHECK-NEXT:    mla r0, r0, r2, r5
; CHECK-NEXT:    mla r0, r4, r3, r0
; CHECK-NEXT:    vmov q0[3], q0[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = sext <2 x i32> %shuf1 to <2 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %shuf2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0213_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
; CHECK-NEXT:    vmullb.s32 q2, q0, q3
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    vmullb.s32 q1, q0, q3
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i32> %shuf2 to <4 x i64>
  %out = mul <4 x i64> %out1, %out2
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0ext_0213:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
; CHECK-NEXT:    vmullb.s32 q2, q3, q0
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    vmullb.s32 q1, q3, q0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i32> %shuf2 to <4 x i64>
  %out = mul <4 x i64> %out2, %out1
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0213_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vmov q1, q0
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    vmov.f32 s4, s5
; CHECK-NEXT:    vmov.f32 s6, s7
; CHECK-NEXT:    umull lr, r12, r1, r0
; CHECK-NEXT:    umull r2, r5, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r0, #31
; CHECK-NEXT:    mla r4, r1, r2, r12
; CHECK-NEXT:    asrs r1, r1, #31
; CHECK-NEXT:    mla r5, r3, r2, r5
; CHECK-NEXT:    asrs r3, r3, #31
; CHECK-NEXT:    mla r1, r1, r0, r4
; CHECK-NEXT:    vmov r4, s4
; CHECK-NEXT:    mla r3, r3, r0, r5
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r1
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    umull r5, lr, r4, r0
; CHECK-NEXT:    umull r3, r12, r1, r0
; CHECK-NEXT:    vmov q1[2], q1[0], r5, r3
; CHECK-NEXT:    mla r3, r1, r2, r12
; CHECK-NEXT:    asrs r1, r1, #31
; CHECK-NEXT:    mla r2, r4, r2, lr
; CHECK-NEXT:    mla r1, r1, r0, r3
; CHECK-NEXT:    asrs r3, r4, #31
; CHECK-NEXT:    mla r0, r3, r0, r2
; CHECK-NEXT:    vmov q1[3], q1[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i64> %out1, %shuf2
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_ext0_0213:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    .save {r4, r5, r7, lr}
; CHECK-NEXT:    push {r4, r5, r7, lr}
; CHECK-NEXT:    vmov q1, q0
; CHECK-NEXT:    asrs r4, r0, #31
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    vmov.f32 s4, s5
; CHECK-NEXT:    vmov.f32 s6, s7
; CHECK-NEXT:    umull lr, r12, r0, r1
; CHECK-NEXT:    umull r2, r5, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r2, lr
; CHECK-NEXT:    asrs r2, r1, #31
; CHECK-NEXT:    mla r2, r0, r2, r12
; CHECK-NEXT:    mla r1, r4, r1, r2
; CHECK-NEXT:    asrs r2, r3, #31
; CHECK-NEXT:    mla r2, r0, r2, r5
; CHECK-NEXT:    vmov r5, s4
; CHECK-NEXT:    mla r2, r4, r3, r2
; CHECK-NEXT:    vmov q0[3], q0[1], r2, r1
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    umull r3, lr, r0, r5
; CHECK-NEXT:    umull r2, r12, r0, r1
; CHECK-NEXT:    vmov q1[2], q1[0], r3, r2
; CHECK-NEXT:    asrs r2, r1, #31
; CHECK-NEXT:    mla r2, r0, r2, r12
; CHECK-NEXT:    mla r1, r4, r1, r2
; CHECK-NEXT:    asrs r2, r5, #31
; CHECK-NEXT:    mla r0, r0, r2, lr
; CHECK-NEXT:    mla r0, r4, r5, r0
; CHECK-NEXT:    vmov q1[3], q1[1], r0, r1
; CHECK-NEXT:    pop {r4, r5, r7, pc}
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = sext <4 x i32> %shuf1 to <4 x i64>
  %ext = sext i32 %src2 to i64
  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i64> %shuf2, %out1
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_0246_0ext(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0246_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.u32 q1, q0, q2
; CHECK-NEXT:    vmov q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out1, %out2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_0246(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0ext_0246:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.u32 q1, q2, q0
; CHECK-NEXT:    vmov q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_0246_ext0(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0246_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull r1, r2, r1, r0
; CHECK-NEXT:    umull r0, r3, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %out1, %shuf2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_0246(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_ext0_0246:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    umull r1, r2, r0, r1
; CHECK-NEXT:    umull r0, r3, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %shuf2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_1357_0ext(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_1357_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q1[2], q1[0], r0, r0
; CHECK-NEXT:    vrev64.32 q2, q0
; CHECK-NEXT:    vmullb.u32 q0, q2, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out1, %out2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_1357(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0ext_1357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov q2[2], q2[0], r0, r0
; CHECK-NEXT:    vmullb.u32 q0, q2, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ins = insertelement <4 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer
  %out2 = zext <2 x i32> %shuf2 to <2 x i64>
  %out = mul <2 x i64> %out2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_1357_ext0(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_1357_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    umull r1, r2, r1, r0
; CHECK-NEXT:    umull r0, r3, r3, r0
; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %out1, %shuf2
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_1357(<4 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_ext0_1357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev64.32 q1, q0
; CHECK-NEXT:    vmov r1, s6
; CHECK-NEXT:    vmov r3, s4
; CHECK-NEXT:    umull r1, r2, r0, r1
; CHECK-NEXT:    umull r0, r3, r0, r3
; CHECK-NEXT:    vmov q0[2], q0[0], r0, r1
; CHECK-NEXT:    vmov q0[3], q0[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
  %out1 = zext <2 x i32> %shuf1 to <2 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <2 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer
  %out = mul <2 x i64> %shuf2, %out1
  ret <2 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0213_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
; CHECK-NEXT:    vmullb.u32 q2, q0, q3
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    vmullb.u32 q1, q0, q3
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i32> %shuf2 to <4 x i64>
  %out = mul <4 x i64> %out1, %out2
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0ext_0213:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov q3[2], q3[0], r0, r0
; CHECK-NEXT:    vmullb.u32 q2, q3, q0
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    vmullb.u32 q1, q3, q0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
  %ins = insertelement <8 x i32> poison, i32 %src2, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i32> %shuf2 to <4 x i64>
  %out = mul <4 x i64> %out2, %out1
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0213_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    umull r1, r12, r1, r0
; CHECK-NEXT:    umull r3, r2, r3, r0
; CHECK-NEXT:    vmov q2[2], q2[0], r3, r1
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    vmov q2[3], q2[1], r2, r12
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    umull r1, r2, r1, r0
; CHECK-NEXT:    umull r0, r3, r3, r0
; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i64> %out1, %shuf2
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_ext0_0213:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    vmov.f32 s0, s1
; CHECK-NEXT:    vmov.f32 s2, s3
; CHECK-NEXT:    umull r1, r12, r0, r1
; CHECK-NEXT:    umull r3, r2, r0, r3
; CHECK-NEXT:    vmov q2[2], q2[0], r3, r1
; CHECK-NEXT:    vmov r1, s2
; CHECK-NEXT:    vmov r3, s0
; CHECK-NEXT:    vmov q2[3], q2[1], r2, r12
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    umull r1, r2, r0, r1
; CHECK-NEXT:    umull r0, r3, r0, r3
; CHECK-NEXT:    vmov q1[2], q1[0], r0, r1
; CHECK-NEXT:    vmov q1[3], q1[1], r3, r2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
  %out1 = zext <4 x i32> %shuf1 to <4 x i64>
  %ext = zext i32 %src2 to i64
  %ins = insertelement <4 x i64> poison, i64 %ext, i32 0
  %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i64> %shuf2, %out1
  ret <4 x i64> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_02468101214_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.s16 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out1, %out2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_0ext_02468101214:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.s16 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_02468101214_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s16 q0, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %out1, %shuf2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_ext0_02468101214:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s16 q0, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %shuf2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_13579111315_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vrev32.16 q0, q0
; CHECK-NEXT:    vmullb.s16 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out1, %out2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_0ext_13579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev32.16 q0, q0
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.s16 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = sext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_13579111315_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.s16 q0, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %out1, %shuf2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_ext0_13579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.s16 q0, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <4 x i16> %shuf1 to <4 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %shuf2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_02461357_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q2, r0
; CHECK-NEXT:    vrev32.16 q1, q0
; CHECK-NEXT:    vmullb.s16 q1, q1, q2
; CHECK-NEXT:    vmullb.s16 q0, q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i16> %shuf2 to <8 x i32>
  %out = mul <8 x i32> %out1, %out2
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @sext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_0ext_02461357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev32.16 q1, q0
; CHECK-NEXT:    vdup.16 q2, r0
; CHECK-NEXT:    vmullb.s16 q1, q2, q1
; CHECK-NEXT:    vmullb.s16 q0, q2, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i16> %shuf2 to <8 x i32>
  %out = mul <8 x i32> %out2, %out1
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_02461357_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s16 q1, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q2, q1, r0
; CHECK-NEXT:    vmovlt.s16 q0, q0
; CHECK-NEXT:    vmul.i32 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i32> %out1, %shuf2
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @sext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: sext16_ext0_02461357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s16 q1, q0
; CHECK-NEXT:    sxth r0, r0
; CHECK-NEXT:    vmul.i32 q2, q1, r0
; CHECK-NEXT:    vmovlt.s16 q0, q0
; CHECK-NEXT:    vmul.i32 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = sext <8 x i16> %shuf1 to <8 x i32>
  %ext = sext i16 %src2 to i32
  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i32> %shuf2, %out1
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_02468101214_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.u16 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out1, %out2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_0ext_02468101214:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.u16 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_02468101214_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u16 q0, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %out1, %shuf2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_ext0_02468101214:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u16 q0, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %shuf2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_13579111315_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vrev32.16 q0, q0
; CHECK-NEXT:    vmullb.u16 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out1, %out2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_0ext_13579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev32.16 q0, q0
; CHECK-NEXT:    vdup.32 q1, r0
; CHECK-NEXT:    vmullb.u16 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ins = insertelement <8 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer
  %out2 = zext <4 x i16> %shuf2 to <4 x i32>
  %out = mul <4 x i32> %out2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_13579111315_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.u16 q0, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %out1, %shuf2
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_ext0_13579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.u16 q0, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <4 x i16> %shuf1 to <4 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <4 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer
  %out = mul <4 x i32> %shuf2, %out1
  ret <4 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_0ext(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_02461357_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q2, r0
; CHECK-NEXT:    vrev32.16 q1, q0
; CHECK-NEXT:    vmullb.u16 q1, q1, q2
; CHECK-NEXT:    vmullb.u16 q0, q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i16> %shuf2 to <8 x i32>
  %out = mul <8 x i32> %out1, %out2
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @zext16_0ext_02461357(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_0ext_02461357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev32.16 q1, q0
; CHECK-NEXT:    vdup.16 q2, r0
; CHECK-NEXT:    vmullb.u16 q1, q2, q1
; CHECK-NEXT:    vmullb.u16 q0, q2, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
  %ins = insertelement <16 x i16> poison, i16 %src2, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i16> %shuf2 to <8 x i32>
  %out = mul <8 x i32> %out2, %out1
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_ext0(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_02461357_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u16 q1, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q2, q1, r0
; CHECK-NEXT:    vmovlt.u16 q0, q0
; CHECK-NEXT:    vmul.i32 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i32> %out1, %shuf2
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i32> @zext16_ext0_02461357(<16 x i16> %src1, i16 %src2) {
; CHECK-LABEL: zext16_ext0_02461357:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u16 q1, q0
; CHECK-NEXT:    uxth r0, r0
; CHECK-NEXT:    vmul.i32 q2, q1, r0
; CHECK-NEXT:    vmovlt.u16 q0, q0
; CHECK-NEXT:    vmul.i32 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
  %out1 = zext <8 x i16> %shuf1 to <8 x i32>
  %ext = zext i16 %src2 to i32
  %ins = insertelement <8 x i32> poison, i32 %ext, i32 0
  %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i32> %shuf2, %out1
  ret <8 x i32> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_024681012141618202224262830_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.s8 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out1, %out2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0ext_024681012141618202224262830:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.s8 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_024681012141618202224262830_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s8 q0, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %out1, %shuf2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_ext0_024681012141618202224262830:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s8 q0, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %shuf2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_135791113151719212325272931_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vrev16.8 q0, q0
; CHECK-NEXT:    vmullb.s8 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out1, %out2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0ext_135791113151719212325272931:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev16.8 q0, q0
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.s8 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = sext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_135791113151719212325272931_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.s8 q0, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %out1, %shuf2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_ext0_135791113151719212325272931:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.s8 q0, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <8 x i8> %shuf1 to <8 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %shuf2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0246810121413579111315_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.8 q2, r0
; CHECK-NEXT:    vrev16.8 q1, q0
; CHECK-NEXT:    vmullb.s8 q1, q1, q2
; CHECK-NEXT:    vmullb.s8 q0, q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
  %out2 = sext <16 x i8> %shuf2 to <16 x i16>
  %out = mul <16 x i16> %out1, %out2
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @sext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0ext_0246810121413579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev16.8 q1, q0
; CHECK-NEXT:    vdup.8 q2, r0
; CHECK-NEXT:    vmullb.s8 q1, q2, q1
; CHECK-NEXT:    vmullb.s8 q0, q2, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
  %out2 = sext <16 x i8> %shuf2 to <16 x i16>
  %out = mul <16 x i16> %out2, %out1
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_0246810121413579111315_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s8 q1, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q2, q1, r0
; CHECK-NEXT:    vmovlt.s8 q0, q0
; CHECK-NEXT:    vmul.i16 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
  %out = mul <16 x i16> %out1, %shuf2
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @sext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: sext8_ext0_0246810121413579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.s8 q1, q0
; CHECK-NEXT:    sxtb r0, r0
; CHECK-NEXT:    vmul.i16 q2, q1, r0
; CHECK-NEXT:    vmovlt.s8 q0, q0
; CHECK-NEXT:    vmul.i16 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = sext <16 x i8> %shuf1 to <16 x i16>
  %ext = sext i8 %src2 to i16
  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
  %out = mul <16 x i16> %shuf2, %out1
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_024681012141618202224262830_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.u8 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out1, %out2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0ext_024681012141618202224262830:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.u8 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_024681012141618202224262830_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u8 q0, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %out1, %shuf2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_ext0_024681012141618202224262830:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u8 q0, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %shuf2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_135791113151719212325272931_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vrev16.8 q0, q0
; CHECK-NEXT:    vmullb.u8 q0, q0, q1
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out1, %out2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0ext_135791113151719212325272931:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev16.8 q0, q0
; CHECK-NEXT:    vdup.16 q1, r0
; CHECK-NEXT:    vmullb.u8 q0, q1, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ins = insertelement <16 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer
  %out2 = zext <8 x i8> %shuf2 to <8 x i16>
  %out = mul <8 x i16> %out2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_135791113151719212325272931_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.u8 q0, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %out1, %shuf2
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_ext0_135791113151719212325272931:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlt.u8 q0, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q0, q0, r0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <8 x i8> %shuf1 to <8 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <8 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer
  %out = mul <8 x i16> %shuf2, %out1
  ret <8 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0246810121413579111315_0ext:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vdup.8 q2, r0
; CHECK-NEXT:    vrev16.8 q1, q0
; CHECK-NEXT:    vmullb.u8 q1, q1, q2
; CHECK-NEXT:    vmullb.u8 q0, q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
  %out2 = zext <16 x i8> %shuf2 to <16 x i16>
  %out = mul <16 x i16> %out1, %out2
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @zext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0ext_0246810121413579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vrev16.8 q1, q0
; CHECK-NEXT:    vdup.8 q2, r0
; CHECK-NEXT:    vmullb.u8 q1, q2, q1
; CHECK-NEXT:    vmullb.u8 q0, q2, q0
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
  %ins = insertelement <32 x i8> poison, i8 %src2, i32 0
  %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer
  %out2 = zext <16 x i8> %shuf2 to <16 x i16>
  %out = mul <16 x i16> %out2, %out1
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_0246810121413579111315_ext0:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u8 q1, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q2, q1, r0
; CHECK-NEXT:    vmovlt.u8 q0, q0
; CHECK-NEXT:    vmul.i16 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
  %out = mul <16 x i16> %out1, %shuf2
  ret <16 x i16> %out
}

define arm_aapcs_vfpcc <16 x i16> @zext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) {
; CHECK-LABEL: zext8_ext0_0246810121413579111315:
; CHECK:       @ %bb.0: @ %entry
; CHECK-NEXT:    vmovlb.u8 q1, q0
; CHECK-NEXT:    uxtb r0, r0
; CHECK-NEXT:    vmul.i16 q2, q1, r0
; CHECK-NEXT:    vmovlt.u8 q0, q0
; CHECK-NEXT:    vmul.i16 q1, q0, r0
; CHECK-NEXT:    vmov q0, q2
; CHECK-NEXT:    bx lr
entry:
  %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
  %out1 = zext <16 x i8> %shuf1 to <16 x i16>
  %ext = zext i8 %src2 to i16
  %ins = insertelement <16 x i16> poison, i16 %ext, i32 0
  %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer
  %out = mul <16 x i16> %shuf2, %out1
  ret <16 x i16> %out
}
